%% ====== SETTINGS ======
numSeeds = 3;     % seed0..2
numFolds = 5;     % fold0..4
ionsUse  = 1:3;   % ions 1-3 only
thresh   = 1.5e-10;  % presence/absence threshold

% Range thresholds for multi-class confusion and grouped boxplots
r1 = 5e-7; 
r2 = 1e-3;

% Histogram settings
binWidth = 20;
eMaxPlot = 200;                % consider errors in [0, eMaxPlot]
numBins  = eMaxPlot/binWidth;

%% ====== LOAD DATA (same structure as before) ======
allData = cell(numSeeds, numFolds);
for s = 0:numSeeds-1
    for f = 0:numFolds-1
        filePath = fullfile(sprintf('seed%d', s), sprintf('fold%d', f), 'test_true_pred.csv');
        if exist(filePath, 'file')
            allData{s+1, f+1} = readmatrix(filePath);
        else
            warning('Missing file: %s', filePath);
            allData{s+1, f+1} = [];
        end
    end
end

%% ====== ABSOLUTE % ERROR (keep original logic for <=/ > thresh) ======
absError = cell(size(allData)); % each cell: N x 6
for s = 1:size(allData,1)
    for f = 1:size(allData,2)
        D = allData{s,f};
        if isempty(D), absError{s,f} = []; continue; end

        trueVals = D(:,1:6);
        predVals = D(:,7:12);

        err = nan(size(trueVals));
        maskLow  = (trueVals <= thresh) & (predVals <= thresh);
        maskHigh = (trueVals >  thresh) & (predVals >  thresh);
        err(maskLow)  = 100*abs(trueVals(maskLow)  - predVals(maskLow)) ./ trueVals(maskLow);
        err(maskHigh) = 100*abs(trueVals(maskHigh) - predVals(maskHigh)) ./ trueVals(maskHigh);
        absError{s,f} = err;
    end
end

%% ====== AGGREGATE (ALL FOLDS) VECTORS FOR ions 1–3 ======
errAll  = []; trueAll = []; predAll = [];
for s = 1:size(allData,1)
    for f = 1:size(allData,2)
        if isempty(allData{s,f}) || isempty(absError{s,f}), continue; end
        E = absError{s,f}(:, ionsUse);
        T = allData{s,f}(:, ionsUse);
        P = allData{s,f}(:, ionsUse + 6);
        errAll  = [errAll;  E(:)];
        trueAll = [trueAll; T(:)];
        predAll = [predAll; P(:)];
    end
end

%% ====== RANK FOLDS AND PICK BEST-5 (minimize Q3 and upper whisker across ions) ======
% rankRows columns:
% [s f q3_1 q3_2 q3_3 uw_1 uw_2 uw_3 maxQ3 maxUW meanQ3 N]
rankRows = [];
for s = 1:size(absError,1)
    for f = 1:size(absError,2)
        E = absError{s,f};
        if isempty(E), continue; end

        q3 = nan(1, numel(ionsUse));
        uw = nan(1, numel(ionsUse));
        Nsum = 0;

        for k = 1:numel(ionsUse)
            e = E(:, ionsUse(k));
            m = isfinite(e) & (e >= 0);
            Nsum = Nsum + sum(m);
            if any(m)
                ek  = e(m);
                q1k = prctile(ek, 25);
                q3k = prctile(ek, 75);
                iqrk = q3k - q1k;
                fence = q3k + 1.5*iqrk;
                q3(k) = q3k;
                uw(k) = max( ek(ek <= fence) );   % upper whisker (per MATLAB boxplot rule)
            else
                q3(k) = inf; uw(k) = inf;         % penalize missing
            end
        end

        maxQ3  = max(q3);
        maxUW  = max(uw);
        meanQ3 = mean(q3, 'omitnan');

        rankRows = [rankRows; s, f, q3, uw, maxQ3, maxUW, meanQ3, Nsum];
    end
end

if isempty(rankRows)
    error('No valid folds found to rank.');
end

% Sort by: smallest worst-ion Q3, then smallest worst-ion whisker, then mean Q3
[~, ord] = sortrows(rankRows(:, [9 10 11]), [1 1 1]);
ranked = rankRows(ord, :);

% Take top 5 (or fewer if not enough)
K = min(5, size(ranked,1));
best5 = ranked(1:K, :);  % keep [s f ...] structure

fprintf('\n=== Best %d (seed,fold) by min max-Q3 then max-whisker (ions 1–3) ===\n', K);
fprintf('  s  f   Q3_1  Q3_2  Q3_3 |  UW_1  UW_2  UW_3 |  maxQ3  maxUW  meanQ3    N\n');
for i = 1:K
    fprintf(' %2d %2d  %6.2f %6.2f %6.2f | %6.2f %6.2f %6.2f | %6.2f %6.2f %7.2f %5d\n', ...
        best5(i,1)-1, best5(i,2)-1, ...                 % print 0-based seed/fold
        best5(i,3), best5(i,4), best5(i,5), ...         % Q3s per ion
        best5(i,6), best5(i,7), best5(i,8), ...         % whiskers per ion
        best5(i,9), best5(i,10), best5(i,11), best5(i,12));
end
fprintf('Note: seeds/folds shown are 0-based above; indices in arrays remain 1-based.\n\n');

%% ====== BUILD "BEST-5" AGGREGATES ======
errBest  = []; trueBest = []; predBest = [];
T_ion_best = cell(1, numel(ionsUse));
P_ion_best = cell(1, numel(ionsUse));
for k = 1:numel(ionsUse)
    T_ion_best{k} = [];
    P_ion_best{k} = [];
end

for i = 1:K
    s = best5(i,1); f = best5(i,2);
    if isempty(allData{s,f}) || isempty(absError{s,f}), continue; end
    E = absError{s,f}(:, ionsUse);
    T = allData{s,f}(:, ionsUse);
    P = allData{s,f}(:, ionsUse + 6);

    errBest  = [errBest;  E(:)];
    trueBest = [trueBest; T(:)];
    predBest = [predBest; P(:)];

    % For confusion matrices (per ion)
    for j = 1:numel(ionsUse)
        tk = allData{s,f}(:, ionsUse(j));
        pk = allData{s,f}(:, ionsUse(j) + 6);
        v = isfinite(tk) & isfinite(pk);
        T_ion_best{j} = [T_ion_best{j}; tk(v)];
        P_ion_best{j} = [P_ion_best{j}; pk(v)];
    end
end

%% ====== HELPER: histogram plotter ======
function plot_err_hist(eVec, lbl, binWidth, eMaxPlot)
    valid = isfinite(eVec);
    totalCount = sum(valid);
    m = valid & (eVec >= 0) & (eVec <= eMaxPlot);
    eClip = eVec(m);
    numBins = eMaxPlot/binWidth;
    counts = zeros(numBins,1);
    idx = floor(eClip/binWidth) + 1;
    idx(idx > numBins) = numBins;
    for k = 1:numel(idx)
        counts(idx(k)) = counts(idx(k)) + 1;
    end
    percentCounts = (counts / max(totalCount,1)) * 100;
    centers = (0:binWidth:eMaxPlot-binWidth) + binWidth/2;

    figure('Name',sprintf('Histogram of %% Error — %s', lbl));
    bar(centers, percentCounts, 'EdgeColor','black');
    xlim([0 300]); xticks(0:20:300);
    set(gcf,'Units','centimeters','Position',[5 5 15 12]);
    ax = gca; ax.YLimitMethod = "padded"; ax.FontSize = 15; ax.LineWidth = 3;
    ylim([0 80]); box off;
end

%% ====== HISTOGRAMS: ALL vs BEST-5 ======
plot_err_hist(errAll,  'ALL folds',  binWidth, eMaxPlot);
plot_err_hist(errBest, 'Best-5',     binWidth, eMaxPlot);

%% ====== CONFUSION MATRICES: helper bits ======
presenceLabel = @(x) categorical(x > thresh, [false true], {'Absent','Present'});
classOrder = {'\leq1e-10','(1e-10,1e-6]','(1e-6,1e-3]','>1e-3'};
toRangeCat = @(x) categorical( ...
    1*(x <= thresh) + ...
    2*((x > thresh) & (x <= r1)) + ...
    3*((x > r1)    & (x <= r2)) + ...
    4*(x > r2), ...
    1:4, classOrder, 'Ordinal', true);

useCC = (exist('confusionchart','file') == 2);

%% ====== CONFUSION: Presence/Absence per ion — ALL ======
T_ion_all = cell(1, numel(ionsUse));
P_ion_all = cell(1, numel(ionsUse));
for k = 1:numel(ionsUse)
    tt = []; pp = [];
    for s = 1:size(allData,1)
        for f = 1:size(allData,2)
            D = allData{s,f};
            if isempty(D), continue; end
            t_k = D(:, ionsUse(k)); p_k = D(:, ionsUse(k)+6);
            v = isfinite(t_k) & isfinite(p_k);
            tt = [tt; t_k(v)];
            pp = [pp; p_k(v)];
        end
    end
    T_ion_all{k} = tt; P_ion_all{k} = pp;
end

for k = 1:numel(ionsUse)
    T_pa = presenceLabel(T_ion_all{k});
    P_pa = presenceLabel(P_ion_all{k});
    figure('Name', sprintf('Confusion (Presence) — ALL — Ion %d', ionsUse(k)));
    if useCC
        cc = confusionchart(T_pa, P_pa); 
        if isprop(cc,'Title');  cc.Title = sprintf('Presence (thr=%.1e) — ALL — Ion %d',thresh,ionsUse(k)); end
        if isprop(cc,'XLabel'); cc.XLabel = 'Predicted'; end
        if isprop(cc,'YLabel'); cc.YLabel = 'True'; end
        try, cc.Normalization = 'row-normalized'; set(cc,'RowSummary','row-normalized','ColumnSummary','column-normalized'); end
    else
        C = confusionmat(T_pa, P_pa);
        imagesc(C); axis equal tight; colorbar;
        set(gca,'XTick',1:2,'XTickLabel',{'Absent','Present'}, 'YTick',1:2,'YTickLabel',{'Absent','Present'});
        xlabel('Predicted'); ylabel('True');
        title(sprintf('Presence (thr=%.1e) — ALL — Ion %d',thresh,ionsUse(k)));
        for i=1:2, for j=1:2, text(j,i,num2str(C(i,j)),'HorizontalAlignment','center','FontWeight','bold'); end, end
    end
end

%% ====== CONFUSION: Presence/Absence per ion — BEST-5 ======
for k = 1:numel(ionsUse)
    T_pa = presenceLabel(T_ion_best{k});
    P_pa = presenceLabel(P_ion_best{k});
    figure('Name', sprintf('Confusion (Presence) — BEST-5 — Ion %d', ionsUse(k)));
    if useCC
        cc = confusionchart(T_pa, P_pa); 
        if isprop(cc,'Title');  cc.Title = sprintf('Presence (thr=%.1e) — BEST-5 — Ion %d',thresh,ionsUse(k)); end
        if isprop(cc,'XLabel'); cc.XLabel = 'Predicted'; end
        if isprop(cc,'YLabel'); cc.YLabel = 'True'; end
        try, cc.Normalization = 'row-normalized'; set(cc,'RowSummary','row-normalized','ColumnSummary','column-normalized'); end
    else
        C = confusionmat(T_pa, P_pa);
        imagesc(C); axis equal tight; colorbar;
        set(gca,'XTick',1:2,'XTickLabel',{'Absent','Present'}, 'YTick',1:2,'YTickLabel',{'Absent','Present'});
        xlabel('Predicted'); ylabel('True');
        title(sprintf('Presence (thr=%.1e) — BEST-5 — Ion %d',thresh,ionsUse(k)));
        for i=1:2, for j=1:2, text(j,i,num2str(C(i,j)),'HorizontalAlignment','center','FontWeight','bold'); end, end
    end
end

%% ====== CONFUSION: 4-class ranges — ALL vs BEST-5 ======
T_all_ALL = vertcat(T_ion_all{:});
P_all_ALL = vertcat(P_ion_all{:});
T_all_BST = vertcat(T_ion_best{:});
P_all_BST = vertcat(P_ion_best{:});

% ALL
figure('Name','Confusion (4-class ranges) — ALL');
T_rg = toRangeCat(T_all_ALL); P_rg = toRangeCat(P_all_ALL);
if useCC
    cc = confusionchart(T_rg, P_rg); 
    if isprop(cc,'Title');  cc.Title = 'Concentration Ranges — ALL (Ions 1–3)'; end
    if isprop(cc,'XLabel'); cc.XLabel = 'Predicted'; end
    if isprop(cc,'YLabel'); cc.YLabel = 'True'; end
    try, cc.Normalization = 'row-normalized'; set(cc,'RowSummary','row-normalized','ColumnSummary','column-normalized'); end
else
    C = confusionmat(T_rg, P_rg);
    imagesc(C); axis equal tight; colorbar;
    set(gca,'XTick',1:4,'XTickLabel',classOrder,'YTick',1:4,'YTickLabel',classOrder);
    xlabel('Predicted'); ylabel('True'); title('Concentration Ranges — ALL (Ions 1–3)');
    for i=1:4, for j=1:4, text(j,i,num2str(C(i,j)),'HorizontalAlignment','center','FontWeight','bold'); end, end
end

% BEST-5
figure('Name','Confusion (4-class ranges) — BEST-5');
T_rg = toRangeCat(T_all_BST); P_rg = toRangeCat(P_all_BST);
if useCC
    cc = confusionchart(T_rg, P_rg); 
    if isprop(cc,'Title');  cc.Title = 'Concentration Ranges — BEST-5 (Ions 1–3)'; end
    if isprop(cc,'XLabel'); cc.XLabel = 'Predicted'; end
    if isprop(cc,'YLabel'); cc.YLabel = 'True'; end
    try, cc.Normalization = 'row-normalized'; set(cc,'RowSummary','row-normalized','ColumnSummary','column-normalized'); end
else
    C = confusionmat(T_rg, P_rg);
    imagesc(C); axis equal tight; colorbar;
    set(gca,'XTick',1:4,'XTickLabel',classOrder,'YTick',1:4,'YTickLabel',classOrder);
    xlabel('Predicted'); ylabel('True'); title('Concentration Ranges — BEST-5 (Ions 1–3)');
    for i=1:4, for j=1:4, text(j,i,num2str(C(i,j)),'HorizontalAlignment','center','FontWeight','bold'); end, end
end

%% ====== BOX PLOTS: aggregated by TRUE range — ALL & BEST-5 side-by-side in ONE FIGURE ======
if ~exist('thresh','var'); thresh = 1.5e-10; end
if ~exist('r1','var');     r1     = 1e-6;    end
if ~exist('r2','var');     r2     = 1e-3;    end

rangeLabels = {'\leq1e-10','(1e-10,1e-6]','(1e-6,1e-3]','>1e-3'};
rangeClass = @(x) categorical( ...
    1*(x <= thresh) + ...
    2*((x > thresh) & (x <= r1)) + ...
    3*((x > r1)    & (x <= r2)) + ...
    4*(x > r2), ...
    1:4, rangeLabels, 'Ordinal', true);

% Masks
maskA = isfinite(errAll)  & (errAll  >= 0) & isfinite(trueAll);
maskB = isfinite(errBest) & (errBest >= 0) & isfinite(trueBest);

% Group indices 1..4 (fixed order) for each datapoint
gA = rangeClass(trueAll(maskA)); gA_idx = double(gA);
gB = rangeClass(trueBest(maskB)); gB_idx = double(gB);

% Unique groups present in each set (keep numeric 1..4)
uA = unique(gA_idx(:))'; 
uB = unique(gB_idx(:))';

% Side-by-side positions per class
offset  = 0.18;
posAll  = (uA - offset);
posBest = (uB + offset);

figure('Name','Error by TRUE Range — ALL vs BEST-5 (Ions 1–3)'); hold on;
set(gcf,'Units','centimeters','Position',[5 5 18 12]);

colorAll  = [0.00 0.20 0.40]; % dark blue
colorBest = [0.80 0.10 0.10]; % dark red

% Plot ALL (left boxes)
if ~isempty(gA_idx)
    boxplot(errAll(maskA), gA_idx, ...
        'Positions', posAll, 'Colors', colorAll, 'Widths', 0.22, 'Symbol','+');
end

% Plot BEST-5 (right boxes)
if ~isempty(gB_idx)
    boxplot(errBest(maskB), gB_idx, ...
        'Positions', posBest, 'Colors', colorBest, 'Widths', 0.22, 'Symbol','+');
end

% Styling (applies same linewidth to both sets)
set(findobj(gca,'Tag','Box'),     'LineWidth', 2);
set(findobj(gca,'Tag','Whisker'), 'LineWidth', 2);
set(findobj(gca,'Tag','Cap'),     'LineWidth', 2);
set(findobj(gca,'Tag','Median'),  'LineWidth', 2);

% Axes/labels
xlim([0.5 4.5]);
xticks(1:4);
xticklabels(rangeLabels);
ylabel('Absolute % Error');
ax = gca; ax.YLimitMethod = "padded"; ax.FontSize = 15; ax.LineWidth = 2;
ylim([-5 85]); box off;

% Legend (dummy lines)
h1 = plot(nan,nan,'-','Color',colorAll,  'LineWidth',6);
h2 = plot(nan,nan,'-','Color',colorBest, 'LineWidth',6);
legend([h1 h2], {'ALL folds','BEST-5'}, 'Location','northwest'); legend boxoff;

%% ====== COMBINED PER-ION BOX PLOT: ALL vs BEST-5 in one figure ======
if ~exist('ionsUse','var'), ionsUse = 1:3; end

% Build per-ion error vectors (ALL)
errByIon_ALL = cell(1,numel(ionsUse));
for k = 1:numel(ionsUse), errByIon_ALL{k} = []; end
for s = 1:size(absError,1)
    for f = 1:size(absError,2)
        E = absError{s,f};
        if isempty(E), continue; end
        for k = 1:numel(ionsUse)
            e = E(:, ionsUse(k));
            m = isfinite(e) & (e >= 0);
            errByIon_ALL{k} = [errByIon_ALL{k}; e(m)];
        end
    end
end

% Build per-ion error vectors (BEST-5)
errByIon_BST = cell(1,numel(ionsUse));
for k = 1:numel(ionsUse), errByIon_BST{k} = []; end
if exist('best5','var') && ~isempty(best5)
    for i = 1:size(best5,1)
        s = best5(i,1); f = best5(i,2);
        E = absError{s,f};
        if isempty(E), continue; end
        for k = 1:numel(ionsUse)
            e = E(:, ionsUse(k));
            m = isfinite(e) & (e >= 0);
            errByIon_BST{k} = [errByIon_BST{k}; e(m)];
        end
    end
else
    warning('best5 not found. Only ALL folds will be plotted.');
end

% Flatten to vectors with group indices (1,2,3 for ions)
valsAll = []; grpAll = [];
valsBest = []; grpBest = [];
for k = 1:numel(ionsUse)
    valsAll  = [valsAll;  errByIon_ALL{k}(:)];
    grpAll   = [grpAll;   k*ones(numel(errByIon_ALL{k}),1)];
    valsBest = [valsBest; errByIon_BST{k}(:)];
    grpBest  = [grpBest;  k*ones(numel(errByIon_BST{k}),1)];
end

% Positions for grouped boxes around integer ticks 1..3
offset  = 0.18;
posAll  = (1:numel(ionsUse)) - offset;
posBest = (1:numel(ionsUse)) + offset;

figure('Name','Per-Ion % Error — ALL vs BEST-5'); hold on;
colorAll  = [0.00 0.20 0.40]; % ALL: dark blue
colorBest = [0.80 0.10 0.10]; % BEST-5: dark red

% Plot BOTH sets on the same axes
if ~isempty(valsAll)
    boxplot(valsAll, grpAll, 'Positions', posAll, 'Colors', colorAll, ...
            'Widths', 0.22);
end
if ~isempty(valsBest)
    boxplot(valsBest, grpBest, 'Positions', posBest, 'Colors', colorBest, ...
            'Widths', 0.22);
end

% Styling
set(findobj(gca,'Tag','Box'),     'LineWidth', 2);
set(findobj(gca,'Tag','Whisker'), 'LineWidth', 2);
set(findobj(gca,'Tag','Cap'),     'LineWidth', 2);
set(findobj(gca,'Tag','Median'),  'LineWidth', 2);

xticks(1:numel(ionsUse));
xticklabels(arrayfun(@(k)sprintf('Ion %d',k), ionsUse, 'UniformOutput', false));
ylabel('Absolute % Error');
set(gcf,'Units','centimeters','Position',[5 5 16 12]);
ax = gca; ax.YLimitMethod="padded"; ax.FontSize=15; ax.LineWidth=1.5;
ylim([-5 65]); box off;
xlim([0.5 3.5]);
% Legend via dummy lines
h1 = plot(nan,nan,'-','Color',colorAll,  'LineWidth',6);
h2 = plot(nan,nan,'-','Color',colorBest, 'LineWidth',6);
legend([h1 h2], {'ALL folds','BEST-5'}, 'Location','northwest'); legend boxoff;

